home *** CD-ROM | disk | FTP | other *** search
Wrap
/* License: This source code may not be used in other applications whether they be personal, commercial, free, or paid without written permission from Read It Later. ///////// DEVELOPER API: readitlaterlist.com/api/ ///////// If you would like to customize Read It Later or build an application that works with Read it Later take a look at the READ IT LATER OPEN API: http://readitlaterlist.com/api/ Suggestions for additions to Read It Later are VERY welcome. A large number of user suggestions have been implemented. Please let me know of any additional features you are seeking at: http://readitlaterlist.com/support/ Thanks */ Components.utils.import("resource://gre/modules/XPCOMUtils.jsm"); function RILwebDownloader() { this.type = 2; this.timeout = 30 * 1000; // this.maxActiveRequests = 4; // increasing this will speed up downloading but will reduce Firefox performance this.maxImages = 300; this.maxStylesheets = 30; this.requests = []; this.activeRequests = 0; this.assetQueueCount = 0; this.imageCount = 0; this.stylesheetCount = 0; this.threads = []; this.imagesQueue = []; this.stylesheetQueue = []; this.dupeCheckAbsolute = {}; this.retainDomains = {}; } // class definition RILwebDownloader.prototype = { // properties required for XPCOM registration: classDescription: "Read It Later Web Page Downloader Javascript XPCOM Component", classID: Components.ID("{c570f4f0-aaf4-11de-8a39-0800200c9a66}"), contractID: "@ril.ideashower.com/rilwebdownloader;1", QueryInterface: XPCOMUtils.generateQI([Components.interfaces.nsIRILwebDownloader]), ////////////////////////////////////////// // setup dealloc : function() { this.APP = null; this.ASSETS = null; this.JSON = null; this.markup = null; this.markupPath = null; this.requests = null; this.threads = null; this.imagesQueue = null; this.stylesheetQueue = null; this.dupeCheckAbsolute = null; this.retainDomains = null; }, init : function(itemId, url) { this.itemId = itemId; this.url = url; this.APP = Components.classes['@ril.ideashower.com/rildelegate;1'].getService().wrappedJSObject; this.ASSETS = Components.classes['@ril.ideashower.com/rilassetmanager;1'].createInstance(Components.interfaces.nsIRILassetManager); this.ASSETS.init(); this.JSON = Components.classes["@mozilla.org/dom/json;1"].createInstance(Components.interfaces.nsIJSON); this.markupPath = this.ASSETS.folderPathForItemId( itemId ) + 'web.html'; this.processorPrototype(); this.fileDownloaderPrototype(); }, start : function(threadId) { //this.APP.d('start downloading ' + this.url); this.threadId = threadId; this.request(this.url, false, this, this.markupCallback); }, // Handling file and assets requests request : function(url, isBinary, delegate, callback, itemInfo, wait) { //this.APP.d('request'); let request = new this.fileDownloader(url, isBinary, delegate, callback); request.itemInfo = itemInfo; this.requests.push(request); if (!wait) this.popRequest(); return request; }, popRequest : function() { if (this.requests && this.activeRequests <= this.maxActiveRequests && this.requests.length > 0 && !this.finished) { this.activeRequests++; let request = this.requests.shift(); if (request) request.start(); } }, requestAsset : function(data) { //this.APP.d('requestAsset'); let itemInfo = data.itemInfo; let type = data.type; // If still under max asset caps (should this be a byte level cap rather than #)? if ( (type == 1 && this.imageCount < this.maxImages) || (type == 2 && this.stylesheetCount < this.maxStylesheets) ) { // Make sure the asset doesn't already exist and then begin downloading // they don't download at same time if ( !this.dupeCheckAbsolute[ itemInfo.absolute ] && !itemInfo.assetExists ) //if ( !this.dupeCheckAbsolute[ itemInfo.absolute ] ) //for testing to skip exists check { let request = this.request(itemInfo.absolute, (type==1), this, type==1?this.imageAssetFinished:this.stylesheetAssetFinished, itemInfo, true); // Add to queue if (type == 1) { this.imagesQueue.push(request); this.imageCount++; } else if (type == 2) { this.stylesheetQueue.push(request); this.stylesheetCount++; } this.assetQueueCount++; //dump("\n " + this.stylesheetCount + ' + ' + this.imageCount + ' = ' + this.assetQueueCount); // Start the connection this.popRequest(); // Add to checks this.dupeCheckAbsolute[ itemInfo.absolute ] = true; } } // Log for retain count regardless if we opened the connection here this.addRetainDomain( itemInfo.assetDomain ); }, requestFinished : function() { this.activeRequests--; this.popRequest(); }, // threading runOnThread : function( func ) { func.defineMainThreadCallbackPrototype(); let thread = Components.classes["@mozilla.org/thread-manager;1"].getService().newThread(0); thread.dispatch( func , thread.DISPATCH_NORMAL); this.threads.push(thread); }, // process markupCallback : function(downloader) { // this.APP.d('markupCallback ' + this.url); this.requestFinished(); if (!downloader.success) return this.finish(false); this.imagesProcessed = false; this.stylesheetsProcessed = false; this.runOnThread( new this.processor('processMarkup', {markup:downloader.data,url:this.url}, this) ); }, markupProcessed : function(markup) { //this.APP.d('markupProcessed ' + this.url); this.imagesProcessed = true; this.stylesheetsProcessed = true; this.markup = markup; this.checkIfFinished(); }, // asset call // Processor Thread // Handles markup and spawns asset downloads processor : function(action, vars, delegate) { // dump("\nprocessor"); this.action = action; this.delegate = delegate; // is this safe as long as the thread doesn't touch it, just passes it? - otherwise send an id that corresponds to the webDownloader and save it to the OFFLINE object for(let i in vars) { this[i] = vars[i]; } // Because only one specific item of content is ever processed at a time, we dropped the need to do contentId lookups with this //this.dupeCheckAbsolute = {}; // absolute is handled in the scope of RILwebDownloader this.dupeCheckLiteral = {}; // literals are handled in the scope per file (per processor thread) }, processorPrototype : function() { // RUN IN THREAD this.processor.prototype = { run : function() { //dump("\nprocessor " + this.url); this.init(); switch(this.action) { case('processMarkup'): this.processMarkup(); break; case('processStylesheet'): this.processStylesheet(); break; } }, init : function() { this.ASSETS = Components.classes['@ril.ideashower.com/rilassetmanager;1'].createInstance(Components.interfaces.nsIRILassetManager); this.ASSETS.init(); this.JSON = Components.classes["@mozilla.org/dom/json;1"].createInstance(Components.interfaces.nsIJSON); }, processMarkup : function() { //dump("\nprocessMarkup"); // Begin Scan for images this.markup = this.processImages( this.markup, 'markup', 1); this.markup = this.processImages( this.markup, 'markup', 2 ); // Begin Scan for stylesheets this.markup = this.processStylesheets( this.markup, 'markup', 1); this.markup = this.processStylesheets( this.markup, 'markup', 2); // Replace relative links this.markup = this.processLinks( this.markup ); this.runOnMain('markupProcessed', this.markup); }, processStylesheet : function() { //dump("\processStylesheet"); this.markup = this.processStylesheets(this.markup, this.url, 2, this.itemInfo); this.markup = this.processImages(this.markup, this.url, 2, this.itemInfo); // Strip any remaining items with absolute urls this.markup = this.markup.replace(/(['"\(])?https?:\/\//gi, '$1/UNREPLACEDABSOLUTE/'); this.runOnMain('stylesheetProcessed', {markup:this.markup, itemInfo:this.itemInfo}); }, processImages : function( content, contentId, type, itemInfo ) { //dump("\processImages"); this.imagesProcessed = false; let literal, match, matchSplit; let regex = type == 1 ? /<(\s)?(img|input) ([^>]*)?src=["']([^"']*)["']/gi : /background(-image)?:[^;}\(]*url\(['"]?([^'"\(\)]*)['"]?\)/gi; // Take markup and scan for img tags match = regex.exec(content); while(match) { literal = match[ type==1 ? 4 : 2 ]; content = this.processAsset(literal, content, contentId, 1, itemInfo ? itemInfo.absolute : null); // Next match match = regex.exec(content); } this.imagesProcessed = true; return content; }, processStylesheets : function(content, contentId, type, itemInfo) { //dump("\processStylesheets"); this.stylesheetsProcessed = false; if (content) { let literal, match, matchSplit, capture1, capture2, processIt; let regex = type == 1 ? /<(\s)?link ([^>]*)?href=["']([^"']*)["']([^>]*)?/gi : /@import\s*(url\()?['"]?([^'"\(\)]*)['"]?/gi; // Take markup and scan for css links and imports match = regex.exec(content); while(match) { processIt = false; if (type == 1) { literal = match[ 3 ]; if (literal) { // There is an href, now check if it has a rel="stylesheet" before or after it capture1 = match[2]; capture2 = match[4]; if ( (capture1 && capture1.match(/stylesheet/i)) || (capture2 && capture2.match(/stylesheet/))) processIt = true; } } else { literal = match[2]; if (literal) processIt = true; } if (processIt) content = this.processAsset(literal, content, contentId, 2, itemInfo ? itemInfo.absolute : null); // Next match match = regex.exec(content); } this.stylesheetsProcessed = true; } return content; }, processLinks : function(content) { //dump("\processLinks"); if (content) { try { let literal, match, absolute; let regex = /<(\s)?a ([^>]*)?href=["']([^"']*)["']([^>]*)?/gi; let searchContent = content; // Take markup and scan for links match = regex.exec(searchContent); while(match) { literal = match[ 3 ]; if (literal.length && !literal.match(/^(\#|https?:\/)/i)) { absolute = this.ASSETS.getAbsoluteFromRelative( literal, this.url ); if (absolute && absolute != literal) { content = content.replace( literal , absolute ); } } // Next match match = regex.exec(searchContent); } } catch(e) { content = searchContent; } } return content; }, processAsset : function(literal, content, contentId, type, baseURL) { //dump("\processAsset"); try { // Check to make sure it's not an asset path (already processed) if (!literal || !literal.match(/\S/) || literal.match('RIL_assets')) return content; // Get a path set for literal let itemInfoJSON = this.ASSETS.pathsForLiteral( literal , !baseURL ? this.url : baseURL, baseURL!=null, type==2?2:false); let itemInfo = itemInfoJSON ? this.JSON.decode(itemInfoJSON) : null; if (!itemInfo) return content; // -- Replace literals with absolutes // dupes should be detected on a specific content basis, not across all files // we removed the need for this.dupeCheckLiteral[contentId] because only one thread is ever processing a specific file if ( !this.dupeCheckLiteral[ itemInfo.literal] ) { // Replace instances of literal in markup with relative paths content = content.replace( itemInfo.literal , itemInfo.assetRelativePath); // Add to dupe array this.dupeCheckLiteral[ itemInfo.literal ] = true; } this.runOnMain( 'requestAsset', {itemInfo:itemInfo, type:type} ); } catch(e) { Components.utils.reportError(e); } return content; }, // QueryInterface : function(iid) { if (iid.equals(Components.interfaces.nsIRunnable) || iid.equals(Components.interfaces.nsISupports)) { return this; } throw Components.results.NS_ERROR_NO_INTERFACE; }, runOnMain : function(selector, argument) { let main = Components.classes["@mozilla.org/thread-manager;1"].getService().mainThread; main.dispatch(new this.mainThreadCallback(this.delegate, selector, argument), this.DISPATCH_NORMAL); }, mainThreadCallback : function(delegate, selector, argument) { this.delegate = delegate; this.selector = selector; this.argument = argument; }, defineMainThreadCallbackPrototype : function() { this.mainThreadCallback.prototype = { run: function() { try { this.delegate[this.selector].call(this.delegate, this.argument); } catch(err) { Components.utils.reportError(err); } }, QueryInterface: function(iid) { if (iid.equals(Components.interfaces.nsIRunnable) || iid.equals(Components.interfaces.nsISupports)) { return this; } throw Components.results.NS_ERROR_NO_INTERFACE; } }; } }; }, imageAssetFinished : function( downloader ) { if (this.finished) return; this.requestFinished(); try { if (downloader.success && downloader.data) { // Save the image to a file - no delegate callback, if it works it works this.APP.OFFLINE.write(downloader.itemInfo.assetPath, downloader.data , true); } } catch(e) { Components.utils.reportError(e); } downloader = null; this.assetFinished(); }, stylesheetAssetFinished : function( downloader ) { if (this.finished) return; //this.APP.d('stylesheetAssetFinished ' + this.url); this.requestFinished(); try { if (downloader.success && downloader.data) { this.imagesProcessed = false; this.stylesheetsProcessed = false; // Process the css file this.runOnThread( new this.processor('processStylesheet', { markup:downloader.data, url:downloader.itemInfo.absolute, itemInfo:downloader.itemInfo }, this) ); } } catch(e) { Components.utils.reportError(e); return; } downloader = null; this.assetFinished(); }, stylesheetProcessed : function(data) { if (this.finished) return; //this.APP.d('stylesheetProcessed ' + this.url); this.imagesProcessed = true; this.stylesheetsProcessed = true; // Save the css to a file - no delegate callback, if it works it works this.APP.OFFLINE.write( data.itemInfo.assetPath, data.markup, true ); this.assetFinished(); }, assetFinished : function() { //this.APP.d('assetFinished ' + this.url); this.assetQueueCount--; this.checkIfFinished(); }, checkIfFinished : function(force) { //this.APP.d('checkIfFinished ' + this.assetQueueCount + ' | ' + this.imagesProcessed + ' | ' + this.stylesheetsProcessed); if (this.finished || this.finishing) return true; if (force || (this.assetQueueCount == 0 && this.imagesProcessed && this.stylesheetsProcessed)) { this.finishing = true; //this.APP.d('finishing'); // Do remaining markup cleanup - strip absolutes if (this.markup) this.markup = this.markup.replace(/([\s"'])(background|src)=["']https?:([^"']*)["']/gi, '$1$2=""'); // Add content type if it isn't set if (this.markup && !this.markup.match(/http-equiv="content-type/i)) this.markup += '<meta http-equiv="content-type" content="text/html; charset=UTF-8">'; // Save markup to file this.APP.OFFLINE.write( this.markupPath, this.markup, false, this, 'finish'); return true; } // reset timeout if (this.timeoutTO) this.timeoutTO = this.APP.clearTimeout( this.timeoutTO ); this.timeoutTO = this.APP.setTimeout(this.timedOut, this.timeout, this, false, this.timeoutTO); return false; }, finish : function(success, statusCode) { //this.APP.d('finish? ' + this.url); if (this.finished) return; if (this.timeoutTO) this.timeoutTO = this.APP.clearTimeout( this.timeoutTO ); this.finished = true; this.success = success; this.statusCode = statusCode ? statusCode : success ? 1 : -1; this.APP.OFFLINE.itemIsDone(this.itemId, this.type, this.threadId, this.success, this.statusCode, this.retainDomains); this.shutdownThreads(); this.dealloc(); }, timedOut : function() { //dump("\n -- timing out.. " + this.assetQueueCount); if (this.finished) return false; this.cancel(true); // decide if we should return an error or just skip waiting assets if (!this.imagesProcessed || !this.stylesheetsProcessed) { this.finish(false); } else { this.checkIfFinished(true); // force it to finish } }, cancel : function(soft) { //dump("\n -- cancelling.. " + this.assetQueueCount); this.APP.clearTimeout( this.timeoutTO ); this.shutdownThreads(); if (!soft) { this.finished = true; } }, shutdownThreads : function() { try { for(let i in this.threads) { if (this.threads[i]) { this.threads[i].shutdown(); this.threads[i] = null; } } this.threads = null; this.threads = []; } catch(e) { Components.utils.reportError(e); } }, addRetainDomain : function(path) { this.retainDomains[ path ] = path; }, getRetainDomains : function() { return this.JSON.encode(this.retainDomains); }, // --- // fileDownloader : function(url, isBinary, delegate, callback) { this.url = url; this.isBinary = isBinary; this.delegate = delegate; this.callback = callback; this.data = ""; }, fileDownloaderPrototype : function() { this.fileDownloader.prototype = { start : function() { //dump("\nstart file: " + Components.classes["@mozilla.org/thread-manager;1"].getService().isMainThread + this.url); try { if (this.url) { try { this.startXMLhttpRequest(); } catch(e) { dump("\nfileDownloader Error x1 : " + e); //Components.utils.reportError(e); } return; } } catch(e){ dump("\nfileDownloader Error x2 : " + e); //Components.utils.reportError(e); } //else this.finish(false); }, // XMLhttpRequest - used for text pages startXMLhttpRequest : function() { var self = this; this.request = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(Components.interfaces.nsIXMLHttpRequest); this.request.open("GET", this.url, true); this.request.onreadystatechange = function(e){ self.onReadyStateChange.call(self, e); }; if (this.isBinary) { this.request.overrideMimeType('text/plain; charset=x-user-defined'); } this.request.send(); }, onReadyStateChange : function(e) { //dump("\nonReadyStateChange: " + this.request.readyState + ' | ' + this.url); /* - TODO - implement this if (this.request.readyState == 2 && this.request.channel.originalURI.spec != this.request.channel.URI.spec) { if ( this.delegate.dupeCheckAbsolute[ this.request.channel.URI.spec ] ) { // would either need to make a copy of the file (assuming its already been downloaded) // or would have to update the source's literal with the new location // in that case it would have to know which source to update (css or markup) // best solution would likely be simlinks this.request.abort(); dump("\n dupe aborted"); } else { // Add it to the checker this.delegate.dupeCheckAbsolute[ this.request.channel.URI.spec ] = true; } } else*/ if (this.request.readyState == 4) { if (this.request.status == 200) { this.data = this.request.responseText; this.finish(true); } else { this.finish(false); } } }, // Finish finish : function(success) { //dump("\nfinish: " + this.url); this.finished = true; this.success = success; this.callback.call(this.delegate, this); } } } }; var components = [RILwebDownloader]; function NSGetModule(compMgr, fileSpec) { return XPCOMUtils.generateModule(components); }